#' ---
#' title: "Replication for When Information Is Not Enough for Strategic Voting"
#' subtitle: "Experimental Data Analysis"
#' author: "Lukas F. Stoetzer, Benjamin Schlegel, Patrick Kraft"
#' date: "September 2023"
#' ---


# Libraries
  library(tidyverse)
  library("summarytools")
  library("kableExtra")
  require(gridExtra)
  library(texreg)
  library(xtable)

# Just to make sure what we mean by select
  select <- dplyr::select


# Prepare Data ===============
  
  # DF Pretest
  df_pretest <- read.csv("00_202901_pretest_data.csv") %>% select(
    duration_sec = Duration..in.seconds.,
    polint_1:income, information:motivation
  ) %>% mutate(
    know_magna = (know_magna == "1215"),
    know_ecb = (know_ecb == "Christine Lagarde"),
    know_crisis = (know_crisis == "Inflation"),
    know_levy = (know_levy == "Redistribution system with the purpose of reducing consumption"),
    know_spend = (know_spend == "Pensions"),
    know_scotland = (know_scotland == "It devolves additional powers to Holyrood."),
    know_premature = (know_premature == "If a majority of two thirds in the House of Commons decides to do so."),
    know_parliament = (know_parliament == "The House of Commons"),
    know_commons = (know_commons == "Conservatives"),
    know_lords = (know_lords == "Conservatives"),
    know_windrush = (know_windrush == "Amber Rudd"),
    know_score = know_spend + know_scotland + know_commons,
    test = "Pre-Test"
  ) %>% rename(cognitive_load = capabilities, incentive = motivation)
  
  # DF Main
  df_main <- read_csv("00_200925_main_data.csv") %>% 
    filter(Finished & agree == "I agree to participate in this study.") %>%
  select(duration_sec = `Duration (in seconds)`,
    polint_1:income, information:motivation
  ) %>% mutate(
    know_spend = (know_spend == "Pensions"),
    know_scotland = (know_scotland == "It devolves additional powers to Holyrood."),
    know_commons = (know_commons == "Conservatives"),
    know_score = know_spend + know_scotland +  know_commons,
    test = "Main Study"
  ) %>% rename(cognitive_load = capabilities, incentive = motivation)
  
  df <- bind_rows(df_main, df_pretest)

# Descriptive Statistics ==========
  
  covar <- c("age","gender","country", "race","educ","income")
  
  # Table A3
  filter(df,test=="Pre-Test") %>% 
    mutate("Female"=ifelse(gender=="Male",1,0),
           "England"=ifelse(country=="England",1,0),
           "White" = ifelse(race == "White",1,0),
           "University"= ifelse(educ=="University/CNAA",1,0),
           "IncomeBelow5000"=ifelse(income %in% c("£50,000-£59,999","£60,000-£74,999","£75,000-£99,999","£100,000 or more"),0,1),
           "Knowledge" = know_score,
           "PidLabour" = ifelse(pid=="Labour",1,0),
           "PidConservative" = ifelse(pid=="Conservative",1,0),
           "Ideology" = ideol_1
    ) %>%
    summarise_at(c("Female","England","White","IncomeBelow5000","University", "Knowledge","PidLabour", "PidConservative","Ideology"),  
                 list("Mean"=mean,"Std.Dev."=sd,"Min"=min,"Max"=max), na.rm = TRUE) %>%
    gather(var, val) %>%
    separate(var, c("var","par")) %>%
    spread(par, val) %>%
    select(var, Mean, Std, Min, Max) %>%
    rename(Variable = "var") %>% 
    kable("latex", booktabs = T, align = "c",digits = 2) %>%
    write_file(., "tab_A3.tex")
  
  # Table A4
  filter(df,test=="Main Study") %>% 
    mutate("Female"=ifelse(gender=="Male",1,0),
           "England"=ifelse(country=="England",1,0),
           "White" = ifelse(race == "White",1,0),
           "University"= ifelse(educ=="University/CNAA",1,0),
           "IncomeBelow5000"=ifelse(income %in% c("£50,000-£59,999","£60,000-£74,999","£75,000-£99,999","£100,000 or more"),0,1),
           "Knowledge" = know_score,
           "PidLabour" = ifelse(pid=="Labour",1,0),
           "PidConservative" = ifelse(pid=="Conservative",1,0),
           "Ideology" = ideol_1
    ) %>%
    summarise_at(c("Female","England","White","IncomeBelow5000","University", "Knowledge","PidLabour", "PidConservative","Ideology"),  
                 list("Mean"=mean,"Std.Dev."=sd,"Min"=min,"Max"=max), na.rm = TRUE) %>%
    gather(var, val) %>%
    separate(var, c("var","par")) %>%
    spread(par, val) %>%
    select(var, Mean, Std, Min, Max) %>%
    rename(Variable = "var") %>% 
    kable("latex", booktabs = T, align = "c",digits = 2) %>%
    write_file(., "tab_A4.tex")
  

  
# Randomization Statistics ==========
  
  # Table
  covar <- c("age","gender","country", "race","educ","income")
  
  # Table Pre-Test
  filter(df,test=="Pre-Test") %>% 
    filter(cognitive_load != "distractor", test=="Pre-Test") %>%
    mutate("Female"=ifelse(gender=="Male",1,0),
           "England"=ifelse(country=="England",1,0),
           "White" = ifelse(race == "White",1,0),
           "University"= ifelse(educ=="University/CNAA",1,0),
           "IncomeBelow5000"=ifelse(income %in% c("£50,000-£59,999","£60,000-£74,999","£75,000-£99,999","£100,000 or more"),0,1),
           "Knowledge" = know_score,
           "Pid Labour" = ifelse(pid=="Labour",1,0),
           "Pid Conservative" = ifelse(pid=="Conservative",1,0),
           "Ideology" = ideol_1
    ) %>%
    group_by(cognitive_load, information,incentive) %>%
    summarise_at(c("Female","England","White","IncomeBelow5000","University", "Knowledge","Pid Labour", "Pid Conservative","Ideology"),  mean, na.rm = TRUE) %>%
    kable("latex", booktabs = T, align = "c",digits = 2) %>%
    write_file(., "tab_A5.tex")
  
  
  # Table Main
  filter(df,test=="Main Study") %>% 
    filter(cognitive_load != "distractor", test=="Main Study") %>%
    mutate("Female"=ifelse(gender=="Male",1,0),
           "England"=ifelse(country=="England",1,0),
           "White" = ifelse(race == "White",1,0),
           "University"= ifelse(educ=="University/CNAA",1,0),
           "IncomeBelow5000"=ifelse(income %in% c("£50,000-£59,999","£60,000-£74,999","£75,000-£99,999","£100,000 or more"),0,1),
           "Knowledge" = know_score,
           "Pid Labour" = ifelse(pid=="Labour",1,0),
           "Pid Conservative" = ifelse(pid=="Conservative",1,0),
           "Ideology" = ideol_1
    ) %>%
    group_by(cognitive_load, information,incentive) %>%
    summarise_at(c("Female","England","White","IncomeBelow5000","University", "Knowledge","Pid Labour", "Pid Conservative","Ideology"),  mean, na.rm = TRUE) %>%
    kable("latex", booktabs = T, align = "c",digits = 2) %>%
    write_file(., "tab_A6.tex")

    
# Manipulation Checks ================
  
  # cognitive_load
  df %>%
    mutate(iq = ifelse(iq == "d", 1,0)) %>%
    group_by(cognitive_load, test) %>%
    summarize(Concentration = mean(iq), SE = sqrt(((1-Concentration)*Concentration)/n())) %>%
    arrange(Concentration) %>%
    ggplot(aes(shape=test)) +
    geom_pointrange(aes(y=Concentration, ymin=Concentration-qnorm(0.975)*SE, ymax=Concentration+qnorm(0.975)*SE,
                        x=cognitive_load), size=1.1) + 
    facet_grid(~ test) +
    theme_minimal() + ylab("IQ test Correct") + xlab("Cognitive Load Treatment") +
    theme(legend.position = "none")

  ggsave("fig_A3_1.pdf",width=6, height = 6)
  
  
  # Information
  df %>%
    group_by(information,test) %>%
    summarize_at("probability_1", funs("mean"=mean, "se"=sd(.)/sqrt(n()))) %>%
    ggplot(aes(shape=test)) +
    geom_pointrange(aes(y=mean,ymax=mean+1.67*se,ymin=mean-1.67*se, x=information), size=1.1) +
    facet_grid(~ test) +
    theme_minimal() + ylab("Probability Candidate A") + ylim(35,45) + xlab("Information Treatment") +
    theme(legend.position = "none")
  
  ggsave("fig_A3_2.pdf",width=6,height = 6)
  
  
  # Incentive
  df_plot <- df %>%
    mutate(reason_sincere = ifelse(str_detect("Is closest to my position",as.character(reason)),1,0),
           reason_chance = ifelse(str_detect("Has a realistic chance to win the election", as.character(reason)),1,0)) 
  
  df_plot %>%
    group_by(incentive,test) %>%
    summarize_at(c("reason_chance","reason_sincere"), funs("mean"=mean, "se"=sd(.)/sqrt(n()))) %>%
    gather(var, val, -incentive,-test) %>%
    separate(var, c("rea","type","par")) %>%
    spread(par,val) %>%
    mutate(type = ifelse(type=="chance", "Candidate has realistic chance to win",
                         "Candidate is closest to my position")) %>%
    ggplot(aes(shape=test)) +
    geom_pointrange(aes(y=mean,ymax=mean+1.67*se,ymin=mean-1.67*se, x=incentive), size=1.1) +
    facet_grid(type ~ test, scales = "free_y") +
    theme_minimal() + ylab("Share")  + xlab("Incentive Treatment") +
    theme(legend.position = "none")

  ggsave("fig_A3_3.pdf",width=9,height = 6)


# Treatment Effects ================
  
  # Figure
  df <- df %>%
    mutate(voteB = ifelse(vote == "Candidate B", 1,0))
  
  df_plot <- df %>%
    filter(cognitive_load != "distractor") %>%
    group_by(cognitive_load, information, incentive, test) %>%
    summarize(StrtVote = mean(voteB), 
              n =n(),
              SE = sqrt(((1-StrtVote)*StrtVote)/n())) %>%
    arrange(StrtVote) %>%
    mutate(incentive = factor(incentive, levels = c("bonus", "none"), 
                                            labels = c("High Incentive", "Low Incentive")),
           cognitive_load = factor(cognitive_load, levels = c("blinking", "none"),
                                  labels =c("Low","High")),
           information = factor(information, levels = c("unprecise", "exact"),
                               labels =c("Low", "High")),
           test = factor(test, levels = c("Main Study","Pre-Test"),
                         labels = c("Pre-Registered Main Study","Pilot Study")))  %>% 
    mutate(label = case_when(information == "Low" & cognitive_load == "Low" ~ "Control",
                             information == "High" & cognitive_load == "Low" ~ "Inf.",
                             information == "Low" & cognitive_load == "High" ~ "Cogn. Res.",
                             information == "High" & cognitive_load == "High" ~ "Inf. + Cogn. Res."), 
           label = factor(label, 
                          levels = rev(c("Inf. + Cogn. Res.", "Inf.", "Cogn. Res.","Control")),
                          labels = rev(c("Inf. + \n Cogn. Res.", "Inf.", "Cogn. Res.","Control")),
           ))
  
  df_plot %>% 
    ggplot(aes(y = label, x = StrtVote, 
               xmin = StrtVote-qnorm(0.975)*SE,
               xmax = StrtVote+qnorm(0.975)*SE)) +
    facet_grid(test~incentive) +
    geom_col(position = "dodge") +
    coord_flip() +
    geom_errorbarh(height = 0, position = position_dodge(width = .9)) +
    geom_point() +
    theme_minimal() + 
    scale_fill_grey() +
    labs(y = "", x = "Share Strategic Vote") +
    scale_x_continuous(labels = scales::percent) +
    theme(text = element_text(size = 16), 
          legend.position = "bottom") 

  ggsave("fig_A2.pdf",width=12,height=8)


  # DF Pretest
  df <- df %>%
    mutate(incentive = relevel(as.factor(incentive), "none"), # Relevel
           information = relevel(as.factor(information), "unprecise")) %>%
    mutate(M = ifelse(incentive=="bonus",1,0), 
           I = ifelse(information=="exact",1,0),
           C = ifelse(cognitive_load=="none",1,0)) %>%
    mutate_at(vars(starts_with("vaa")), funs(ifelse(.=="Neutral",1,0))) %>%
    mutate(vaa_neutral = (vaa_1 + vaa_2 + vaa_3 + vaa_4 + vaa_5 + vaa_6 + vaa_7)/7)
  
  

  # Regression Analysis
  m_main0 <- lm(voteB ~ M + I + C,filter(df,test=="Main Study"))
  m_ptst0 <- lm(voteB ~ M + I + C,filter(df,test=="Pre-Test"))
  m_main <- lm(voteB ~ M*I*C,filter(df,test=="Main Study"))
  m_ptst <- lm(voteB ~ M*I*C,filter(df,test=="Pre-Test"))
  
  # Regression Table
  texreg(list(m_ptst0,m_ptst,m_main0,m_main), 
         file = "tab_A7.tex", 
         caption = "Regression Results Experimental Study",
         custom.header = list("Pilot" = 1:2, "Main Study" = 3:4),
         custom.coef.names = c("Intercept",
                               "Incentive",
                               "Information",
                               "Cognitive Load",
                               "Incentive x Information",
                               "Incentive x Cognitive Load",
                               "Information x Cognitive Load",
                               "Incentive x Information x Cognitive Load"),
         float.pos = "ht")
  
  
  # Calculate all expected Cases
  df <- expand.grid("M"=0:1,"I"=0:1,"C"=0:1) %>%
    mutate("M:I"=M*I, "M:C"=M*C,
           "I:C"=I*C,"M:I:C"=M*C*I)
  X <- df
  
  # Main Study
  df$est_main <- as.matrix(X) %*% coef(m_main)[-1]
  S <- MASS::mvrnorm(5000, coef(m_main)[-1],vcov(m_main)[-1,-1])
  df$se_main <- apply(t(as.matrix(X)  %*% t(S)),2,sd) 
  
  # Main Study
  df$est_ptst <- as.matrix(X) %*% coef(m_ptst)[-1]
  S <- MASS::mvrnorm(5000, coef(m_ptst)[-1],vcov(m_ptst)[-1,-1])
  df$se_ptst <- apply(t(as.matrix(X)  %*% t(S)),2,sd) 
  
  # Reshape
  df <- select(df,-"M:I",-"M:C",-"M:I:C",-"I:C") %>%
    gather(est,val,-M,-I,-C) %>%
    separate(est,c("est","test")) %>%
    spread(est,val)

  # Add Labels
  df_res <- df %>%
    mutate(motivation = factor(ifelse(M==1,"High Incentive","Low Incentive"),
                               levels = (c("High Incentive", "Low Incentive"))),
           label = case_when(I == 0 & C == 0 ~ "Control",
                             I == 1 & C == 0 ~ "Inf.",
                             I == 0 & C == 1 ~ "Cogn. Res.",
                             I == 1 & C == 1 ~ "Inf. + Cogn. Res."), 
           p = factor(ifelse(1-pnorm(est/se)>0.05,"n.s.", "p-val<0.05"),
                         levels = c("p-val<0.05","n.s.")),
           test = factor(test,
                         levels = c("main","ptst"), 
                         labels = c("Pre-Registered Main Study","Pilot Study") ),
           label = factor(label, 
                          levels = rev(c("Inf. + Cogn. Res.", "Inf.", "Cogn. Res.")),
                          labels = rev(c("Information &\nCognitive Resources", "Information", "Cognitive Resources")))) 
  
  # Plot
  ggplot(filter(df_res, label != "Control"),
         aes(x = est, y = label, shape = test, col = test)) + 
    geom_point(position = position_dodge(width = .3)) +
    geom_errorbarh(aes(xmin = est-1.67*se, xmax = est+1.67*se), height = .1, position = position_dodge(width = .3)) +
    geom_errorbarh(aes(xmin = est-1.96*se, xmax = est+1.96*se), height = 0, position = position_dodge(width = .3)) +
    facet_wrap(~motivation) + 
    labs(y = "", x = "Effect of High Levels of Information and Cognitive Ressources on Strategic Voting\n(vs. Low Levels in Control Condition)") +
    theme_minimal() +
    scale_color_grey() +
    theme(legend.position = "bottom", legend.title = element_blank()) +
    geom_vline(aes(xintercept=0), col="red",alpha=0.2) +
    theme(text = element_text(size = 16))
  
  ggsave("fig_2.pdf",width=12, height=6)
  
  
  # Marginal Effects
  tab_main <- df_res %>% 
    filter(test == "Pre-Registered Main Study",  I==1 | C ==1) %>%
    select("Incentives Condition" = motivation, "Political Sophistication" = label,"Marginal Effect" = est, "Std. Err." = se, "P-Value" = p) %>%
    xtable::xtable()
     
  print(tab_main, file="tab_A8.tex")
  
  tab_pilot <- df_res %>% 
    filter(test == "Pilot Study",  I==1 | C ==1) %>%
    select("Incentives Condition" = motivation, "Political Sophistication" = label,"Marginal Effect" = est, "Std. Err." = se, "P-Value" = p) %>%
    xtable::xtable()

  print(tab_pilot, file="tab_A9.tex")
  

  
  # Test for signifcant difference between information and capacaity alone and information and cpacaty jointly
  
  # Variable difference between full and 
  get_estse <- function(m=m_main, sel = c("C","M:C","I:C","M:I:C")){
    
    # Get est
    est <- sum(coef(m)[sel])
    
    # Get se
    a <- as.numeric(names(coef(m)) %in% sel)
    v <- vcov(m)
    se <- sqrt(t(a) %*% v %*% (a))
    
    # pvalue
    pval <- 1-pnorm(est/se)
    
    return(data.frame("est"=est,"se"=se,"pval"=pval))
  }
  
  # Estimates for main study
  compar<- list("Info_Full_HghInc"=c("C","M:C","I:C","M:I:C"),
                "Cap_Full_HghInc"=c("I","M:I","I:C","M:I:C"),
                "Info_Full_LowInc"=c("C","I:C"),
                "Cap_Full_LowInc"=c("I","I:C"))
  
  # Main Study
  main_comp <- as.data.frame(t(sapply(compar, get_estse, m=m_main)))
  main_comp$comp <- rownames(main_comp); rownames(main_comp) <- NULL
  main_comp <- main_comp %>%
    separate(comp,c("cond1","cond2","incentives")) %>%
    mutate(pval = factor(ifelse(pval>0.05,"n.s.", "p-val<0.05"),
                      levels = c("p-val<0.05","n.s.")),
           cond1 = factor(cond1, levels=c("Cap","Info"),labels=c("Cog. Res.","Info.")),
           cond2 = factor(cond2, levels=c("Full"),labels=c("Cog.Res. & Info.")),
           incentives= factor(incentives, levels= c("HghInc","LowInc"), labels =c("High","Low"))) %>%
    select("Incentives"=incentives,"From"=cond1, "To"=cond2, "Marg. Eff"=est,"Std. Err."=se,"P-Value"=pval)
  
  print(xtable(main_comp,digits = 3),file = "tab_A10.tex")
  
  # Pilot
  pretest_comp <- as.data.frame(t(sapply(compar, get_estse, m=m_ptst)))
  pretest_comp$comp <- rownames(pretest_comp); rownames(pretest_comp) <- NULL
  pretest_comp <- pretest_comp %>%
    separate(comp,c("cond1","cond2","incentives")) %>%
    mutate(pval = factor(ifelse(pval>0.05,"n.s.", "p-val<0.05"),
                         levels = c("p-val<0.05","n.s.")),
           cond1 = factor(cond1, levels=c("Cap","Info"),labels=c("Cog. Res.","Info.")),
           cond2 = factor(cond2, levels=c("Full"),labels=c("Cog.Res. & Info.")),
           incentives= factor(incentives, levels= c("HghInc","LowInc"), labels =c("High","Low"))) %>%
    select("Incentives"=incentives,"From"=cond1, "To"=cond2, "Marg. Eff"=est,"Std. Err."=se,"P-Value"=pval)
  
  print(xtable(pretest_comp,digits = 3),file = "tab_A11.tex")
  
    
